{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "562ef876",
   "metadata": {},
   "source": [
    "# 数据爬取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6443325f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "这是第1页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第11页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第12页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第13页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第14页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第15页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第16页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第17页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第18页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第19页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第20页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第21页，接下来将先等待3秒...然后以继续抓取\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>...</th>\n",
       "      <th>job.campusJobKind</th>\n",
       "      <th>job.dataPromId</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>recruiter.imId</th>\n",
       "      <th>recruiter.imUserType</th>\n",
       "      <th>recruiter.chatted</th>\n",
       "      <th>recruiter.recruiterId</th>\n",
       "      <th>recruiter.recruiterPhoto</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22jobKind%22%3A%222%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"deb609da5f83fec8a1e79f0c0b23f46a\",\"...</td>\n",
       "      <td>12457241</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>5fa50dff1fcf636a62f8342702u.jpg</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>国信证券深圳互联网分公司</td>\n",
       "      <td>https://www.liepin.com/company/12457241/</td>\n",
       "      <td>基金/证券/期货</td>\n",
       "      <td>[自媒体运营, 内容运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>实习</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>邓女士</td>\n",
       "      <td>人力资源主管</td>\n",
       "      <td>0724b479f957b36106342a441a87c2a0</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>deb609da5f83fec8a1e79f0c0b23f46a</td>\n",
       "      <td>5f8f986779c7cc70efbf36c008u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"ee0a6f0531b74a507d77f77e4b52ce46\",\"...</td>\n",
       "      <td>9547284</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>5f8f9abfdfb13a7dee3432ee08u.jpg</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>零一裂变(深圳)科技有限公司</td>\n",
       "      <td>https://www.liepin.com/company/9547284/</td>\n",
       "      <td>互联网</td>\n",
       "      <td>[大专, 直播运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>应届</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>李女士</td>\n",
       "      <td>HR</td>\n",
       "      <td>1f26af67e8c76bb552e4223b98a2bc60</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>ee0a6f0531b74a507d77f77e4b52ce46</td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"331bd86e2452b34aa6c2d1d19db4aa85\",\"...</td>\n",
       "      <td>13765355</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td></td>\n",
       "      <td>深圳市逸龙云上科技有限公司</td>\n",
       "      <td>https://www.liepin.com/company/13765355/</td>\n",
       "      <td>IT服务</td>\n",
       "      <td>[大专, 直播运营, 直播平台, 抖音平台]</td>\n",
       "      <td>...</td>\n",
       "      <td>应届</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>赵先生</td>\n",
       "      <td>人事经理</td>\n",
       "      <td>464db0c221563abe058646d4efc6c3fb</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>331bd86e2452b34aa6c2d1d19db4aa85</td>\n",
       "      <td>5f8f986bdfb13a7dee342f2108u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...</td>\n",
       "      <td>13133475</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>迪晨文化</td>\n",
       "      <td>https://www.liepin.com/company/13133475/</td>\n",
       "      <td>专业技术服务</td>\n",
       "      <td>[大专, 电商直播, 线下运营, 线上运营, 抖音平台, 直播运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>应届</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>许女士</td>\n",
       "      <td>运营总监</td>\n",
       "      <td>a07dd74399e29e669552a237d109804b</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>494e1776274e3ba3cd4ccb5fcdc273cd</td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...</td>\n",
       "      <td>13133475</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>迪晨文化</td>\n",
       "      <td>https://www.liepin.com/company/13133475/</td>\n",
       "      <td>专业技术服务</td>\n",
       "      <td>[大专, 直播电商, 电商平台, 直播带货经验, 抖音平台]</td>\n",
       "      <td>...</td>\n",
       "      <td>应届</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>许女士</td>\n",
       "      <td>运营总监</td>\n",
       "      <td>a07dd74399e29e669552a237d109804b</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>494e1776274e3ba3cd4ccb5fcdc273cd</td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"f920bfa6b7342844cffcf433b32a4d3a\",\"im...</td>\n",
       "      <td>12782305</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>狮鹫国际教育科技(深圳)有限公司</td>\n",
       "      <td>https://www.liepin.com/company/12782305/</td>\n",
       "      <td>专业技术服务</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>朱女士</td>\n",
       "      <td>市场经理</td>\n",
       "      <td>f920bfa6b7342844cffcf433b32a4d3a</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>55314411e980c8125bccecbcbcec23ec</td>\n",
       "      <td>5f8f98648dbe6273dcf8515508u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"53709b9553f5838bc6ad1568e455f51a\",\"im...</td>\n",
       "      <td>9813897</td>\n",
       "      <td>D轮</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>深圳今日头条科技有限公司</td>\n",
       "      <td>https://www.liepin.com/company/9813897/</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>[linux, mysql, redis, java, php, golang]</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>申先生</td>\n",
       "      <td>软件工程师</td>\n",
       "      <td>53709b9553f5838bc6ad1568e455f51a</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>dbdc6f45427246d47083376beee36d15</td>\n",
       "      <td>5f8f986aea60860b75384fab08u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"85d96157b5c2f7e877f292afcce1a326\",\"im...</td>\n",
       "      <td>12758947</td>\n",
       "      <td>天使轮</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>深圳市热刻影视广告有限公司</td>\n",
       "      <td>https://www.liepin.com/company/12758947/</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>黄先生</td>\n",
       "      <td>人事行政专员</td>\n",
       "      <td>85d96157b5c2f7e877f292afcce1a326</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>de3e0f8da0ebf908238c2a85c3e62424</td>\n",
       "      <td>5f8f986bdfb13a7dee342f2108u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"3021fd11d274365a1806a0cca0964ad2\",\"im...</td>\n",
       "      <td>13526525</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td></td>\n",
       "      <td>利豪(深圳)贸易有限公司</td>\n",
       "      <td>https://www.liepin.com/company/13526525/</td>\n",
       "      <td>批发/零售</td>\n",
       "      <td>[抖音主播, 教育主播, 数码主播]</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>吴先生</td>\n",
       "      <td>总经理</td>\n",
       "      <td>3021fd11d274365a1806a0cca0964ad2</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>383c8ff219bbc6a3bd0e5cc6eccc5364</td>\n",
       "      <td>6468813140a327112a30bf3e06u.png</td>\n",
       "      <td>经验不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22jobKind%22%3A%222%22%2C%22sfrom%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"2\",\"userId\":\"8421f31315305acf52a0e...</td>\n",
       "      <td>13247583</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>佛山雨声艺术培训有限公司</td>\n",
       "      <td>https://www.liepin.com/company/13247583/</td>\n",
       "      <td>培训服务</td>\n",
       "      <td>[新媒体运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=20&amp;...</td>\n",
       "      <td>张女士</td>\n",
       "      <td>教务管理</td>\n",
       "      <td>b519265c857233b9c68319df6cd38213</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>8421f31315305acf52a0e4216d974e3a</td>\n",
       "      <td>5f8f98648dbe6273dcf8515508u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>801 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             dataInfo  \\\n",
       "0   %7B%22jobKind%22%3A%222%22%2C%22ckId%22%3A%22y...   \n",
       "1   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "2   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "3   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "4   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "..                                                ...   \n",
       "36  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "37  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "38  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "39  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "0   %7B%22jobKind%22%3A%222%22%2C%22sfrom%22%3A%22...   \n",
       "\n",
       "                                           dataParams  comp.compId  \\\n",
       "0   {\"userId\":\"deb609da5f83fec8a1e79f0c0b23f46a\",\"...     12457241   \n",
       "1   {\"userId\":\"ee0a6f0531b74a507d77f77e4b52ce46\",\"...      9547284   \n",
       "2   {\"userId\":\"331bd86e2452b34aa6c2d1d19db4aa85\",\"...     13765355   \n",
       "3   {\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...     13133475   \n",
       "4   {\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...     13133475   \n",
       "..                                                ...          ...   \n",
       "36  {\"imId\":\"f920bfa6b7342844cffcf433b32a4d3a\",\"im...     12782305   \n",
       "37  {\"imId\":\"53709b9553f5838bc6ad1568e455f51a\",\"im...      9813897   \n",
       "38  {\"imId\":\"85d96157b5c2f7e877f292afcce1a326\",\"im...     12758947   \n",
       "39  {\"imId\":\"3021fd11d274365a1806a0cca0964ad2\",\"im...     13526525   \n",
       "0   {\"jobKind\":\"2\",\"userId\":\"8421f31315305acf52a0e...     13247583   \n",
       "\n",
       "   comp.compStage                    comp.compLogo comp.compScale  \\\n",
       "0           融资未公开  5fa50dff1fcf636a62f8342702u.jpg       10000人以上   \n",
       "1           融资未公开  5f8f9abfdfb13a7dee3432ee08u.jpg       500-999人   \n",
       "2             NaN  61b07937d0458d53c627567e02u.jpg                  \n",
       "3             NaN  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "4             NaN  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "..            ...                              ...            ...   \n",
       "36            NaN  61b07937d0458d53c627567e02u.jpg         50-99人   \n",
       "37             D轮  61b07937d0458d53c627567e02u.jpg       10000人以上   \n",
       "38            天使轮  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "39            NaN  61b07937d0458d53c627567e02u.jpg                  \n",
       "0             NaN  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "\n",
       "       comp.compName                                 comp.link  \\\n",
       "0       国信证券深圳互联网分公司  https://www.liepin.com/company/12457241/   \n",
       "1     零一裂变(深圳)科技有限公司   https://www.liepin.com/company/9547284/   \n",
       "2      深圳市逸龙云上科技有限公司  https://www.liepin.com/company/13765355/   \n",
       "3               迪晨文化  https://www.liepin.com/company/13133475/   \n",
       "4               迪晨文化  https://www.liepin.com/company/13133475/   \n",
       "..               ...                                       ...   \n",
       "36  狮鹫国际教育科技(深圳)有限公司  https://www.liepin.com/company/12782305/   \n",
       "37      深圳今日头条科技有限公司   https://www.liepin.com/company/9813897/   \n",
       "38     深圳市热刻影视广告有限公司  https://www.liepin.com/company/12758947/   \n",
       "39      利豪(深圳)贸易有限公司  https://www.liepin.com/company/13526525/   \n",
       "0       佛山雨声艺术培训有限公司  https://www.liepin.com/company/13247583/   \n",
       "\n",
       "   comp.compIndustry                                job.labels  ...  \\\n",
       "0           基金/证券/期货                             [自媒体运营, 内容运营]  ...   \n",
       "1                互联网                                [大专, 直播运营]  ...   \n",
       "2               IT服务                    [大专, 直播运营, 直播平台, 抖音平台]  ...   \n",
       "3             专业技术服务        [大专, 电商直播, 线下运营, 线上运营, 抖音平台, 直播运营]  ...   \n",
       "4             专业技术服务            [大专, 直播电商, 电商平台, 直播带货经验, 抖音平台]  ...   \n",
       "..               ...                                       ...  ...   \n",
       "36            专业技术服务                                      [英语]  ...   \n",
       "37           在线社交/媒体  [linux, mysql, redis, java, php, golang]  ...   \n",
       "38           在线社交/媒体                                        []  ...   \n",
       "39             批发/零售                        [抖音主播, 教育主播, 数码主播]  ...   \n",
       "0               培训服务                                   [新媒体运营]  ...   \n",
       "\n",
       "   job.campusJobKind                                     job.dataPromId  \\\n",
       "0                 实习  d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...   \n",
       "1                 应届  d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...   \n",
       "2                 应届  d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...   \n",
       "3                 应届  d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...   \n",
       "4                 应届  d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...   \n",
       "..               ...                                                ...   \n",
       "36               NaN  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...   \n",
       "37               NaN  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...   \n",
       "38               NaN  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...   \n",
       "39               NaN  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...   \n",
       "0                NaN  d_sfrom=search_prime&d_ckId=null&d_curPage=20&...   \n",
       "\n",
       "   recruiter.recruiterName recruiter.recruiterTitle  \\\n",
       "0                      邓女士                   人力资源主管   \n",
       "1                      李女士                       HR   \n",
       "2                      赵先生                     人事经理   \n",
       "3                      许女士                     运营总监   \n",
       "4                      许女士                     运营总监   \n",
       "..                     ...                      ...   \n",
       "36                     朱女士                     市场经理   \n",
       "37                     申先生                    软件工程师   \n",
       "38                     黄先生                   人事行政专员   \n",
       "39                     吴先生                      总经理   \n",
       "0                      张女士                     教务管理   \n",
       "\n",
       "                      recruiter.imId recruiter.imUserType recruiter.chatted  \\\n",
       "0   0724b479f957b36106342a441a87c2a0                    2             False   \n",
       "1   1f26af67e8c76bb552e4223b98a2bc60                    2             False   \n",
       "2   464db0c221563abe058646d4efc6c3fb                    2             False   \n",
       "3   a07dd74399e29e669552a237d109804b                    2             False   \n",
       "4   a07dd74399e29e669552a237d109804b                    2             False   \n",
       "..                               ...                  ...               ...   \n",
       "36  f920bfa6b7342844cffcf433b32a4d3a                    2             False   \n",
       "37  53709b9553f5838bc6ad1568e455f51a                    2             False   \n",
       "38  85d96157b5c2f7e877f292afcce1a326                    2             False   \n",
       "39  3021fd11d274365a1806a0cca0964ad2                    2             False   \n",
       "0   b519265c857233b9c68319df6cd38213                    2             False   \n",
       "\n",
       "               recruiter.recruiterId         recruiter.recruiterPhoto  \\\n",
       "0   deb609da5f83fec8a1e79f0c0b23f46a  5f8f986779c7cc70efbf36c008u.jpg   \n",
       "1   ee0a6f0531b74a507d77f77e4b52ce46  5f8f9863f6d1ab58476f246a08u.jpg   \n",
       "2   331bd86e2452b34aa6c2d1d19db4aa85  5f8f986bdfb13a7dee342f2108u.jpg   \n",
       "3   494e1776274e3ba3cd4ccb5fcdc273cd  5f8f9863f6d1ab58476f246a08u.jpg   \n",
       "4   494e1776274e3ba3cd4ccb5fcdc273cd  5f8f9863f6d1ab58476f246a08u.jpg   \n",
       "..                               ...                              ...   \n",
       "36  55314411e980c8125bccecbcbcec23ec  5f8f98648dbe6273dcf8515508u.jpg   \n",
       "37  dbdc6f45427246d47083376beee36d15  5f8f986aea60860b75384fab08u.jpg   \n",
       "38  de3e0f8da0ebf908238c2a85c3e62424  5f8f986bdfb13a7dee342f2108u.jpg   \n",
       "39  383c8ff219bbc6a3bd0e5cc6eccc5364  6468813140a327112a30bf3e06u.png   \n",
       "0   8421f31315305acf52a0e4216d974e3a  5f8f98648dbe6273dcf8515508u.jpg   \n",
       "\n",
       "    job.requireWorkYears  \n",
       "0                    NaN  \n",
       "1                    NaN  \n",
       "2                    NaN  \n",
       "3                    NaN  \n",
       "4                    NaN  \n",
       "..                   ...  \n",
       "36                  经验不限  \n",
       "37                  经验不限  \n",
       "38                  经验不限  \n",
       "39                  经验不限  \n",
       "0                   经验不限  \n",
       "\n",
       "[801 rows x 32 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import crawl_liepin\n",
    "af= crawl_liepin.crawl(城市=\"深圳\",关键词=\"直播\")\n",
    "af"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "06699ea2",
   "metadata": {},
   "source": [
    "# 详细页面的基本字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ce8bd5f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ed7c6846",
   "metadata": {},
   "outputs": [],
   "source": [
    "技能要求 = []\n",
    "\n",
    "for url in af['job.link']:\n",
    "    try:\n",
    "        s = session.get(url)\n",
    "        command = s.html.find('dd')\n",
    "        技能要求.append(command[0].text)\n",
    "    except Exception as e:\n",
    "        print(f\"Error processing URL {url}: {e}\")\n",
    "        技能要求.append(None)  # 或者你可以选择添加其他默认值\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c24446ed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>...</th>\n",
       "      <th>job.dataPromId</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>recruiter.imId</th>\n",
       "      <th>recruiter.imUserType</th>\n",
       "      <th>recruiter.chatted</th>\n",
       "      <th>recruiter.recruiterId</th>\n",
       "      <th>recruiter.recruiterPhoto</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "      <th>技能要求</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22jobKind%22%3A%222%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"deb609da5f83fec8a1e79f0c0b23f46a\",\"...</td>\n",
       "      <td>12457241</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>5fa50dff1fcf636a62f8342702u.jpg</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>国信证券深圳互联网分公司</td>\n",
       "      <td>https://www.liepin.com/company/12457241/</td>\n",
       "      <td>基金/证券/期货</td>\n",
       "      <td>[自媒体运营, 内容运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>邓女士</td>\n",
       "      <td>人力资源主管</td>\n",
       "      <td>0724b479f957b36106342a441a87c2a0</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>deb609da5f83fec8a1e79f0c0b23f46a</td>\n",
       "      <td>5f8f986779c7cc70efbf36c008u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>工作职责： 1、负责投资者教育相关的文案内容撰写及短视频剪辑。 2、负责策划线上直播与投教活...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"ee0a6f0531b74a507d77f77e4b52ce46\",\"...</td>\n",
       "      <td>9547284</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>5f8f9abfdfb13a7dee3432ee08u.jpg</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>零一裂变(深圳)科技有限公司</td>\n",
       "      <td>https://www.liepin.com/company/9547284/</td>\n",
       "      <td>互联网</td>\n",
       "      <td>[大专, 直播运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>李女士</td>\n",
       "      <td>HR</td>\n",
       "      <td>1f26af67e8c76bb552e4223b98a2bc60</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>ee0a6f0531b74a507d77f77e4b52ce46</td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>职责描述： 1、协助主播做直播流程规划、品类选款准备，直播间设备仪器、灯光等准备工作。 2、...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"331bd86e2452b34aa6c2d1d19db4aa85\",\"...</td>\n",
       "      <td>13765355</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td></td>\n",
       "      <td>深圳市逸龙云上科技有限公司</td>\n",
       "      <td>https://www.liepin.com/company/13765355/</td>\n",
       "      <td>IT服务</td>\n",
       "      <td>[大专, 直播运营, 直播平台, 抖音平台]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>赵先生</td>\n",
       "      <td>人事经理</td>\n",
       "      <td>464db0c221563abe058646d4efc6c3fb</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>331bd86e2452b34aa6c2d1d19db4aa85</td>\n",
       "      <td>5f8f986bdfb13a7dee342f2108u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>岗位职责: 1、负责抖音直播平台的整体规划，平台主播进行资源对接; 2、负责抖音直播运营事宜...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...</td>\n",
       "      <td>13133475</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>迪晨文化</td>\n",
       "      <td>https://www.liepin.com/company/13133475/</td>\n",
       "      <td>专业技术服务</td>\n",
       "      <td>[大专, 电商直播, 线下运营, 线上运营, 抖音平台, 直播运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>许女士</td>\n",
       "      <td>运营总监</td>\n",
       "      <td>a07dd74399e29e669552a237d109804b</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>494e1776274e3ba3cd4ccb5fcdc273cd</td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>一、工作内容： 1. 开播前和运营确定直播流程，熟悉产品，有问题及时调整； 2.调节直播间气...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...</td>\n",
       "      <td>{\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...</td>\n",
       "      <td>13133475</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>迪晨文化</td>\n",
       "      <td>https://www.liepin.com/company/13133475/</td>\n",
       "      <td>专业技术服务</td>\n",
       "      <td>[大专, 直播电商, 电商平台, 直播带货经验, 抖音平台]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=0&amp;d...</td>\n",
       "      <td>许女士</td>\n",
       "      <td>运营总监</td>\n",
       "      <td>a07dd74399e29e669552a237d109804b</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>494e1776274e3ba3cd4ccb5fcdc273cd</td>\n",
       "      <td>5f8f9863f6d1ab58476f246a08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>一、工作内容： 1、直播带货主播 2、通过抖音直播形式向粉丝介绍产品信息，回答粉丝问题，引导...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"f920bfa6b7342844cffcf433b32a4d3a\",\"im...</td>\n",
       "      <td>12782305</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>狮鹫国际教育科技(深圳)有限公司</td>\n",
       "      <td>https://www.liepin.com/company/12782305/</td>\n",
       "      <td>专业技术服务</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>朱女士</td>\n",
       "      <td>市场经理</td>\n",
       "      <td>f920bfa6b7342844cffcf433b32a4d3a</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>55314411e980c8125bccecbcbcec23ec</td>\n",
       "      <td>5f8f98648dbe6273dcf8515508u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>岗位职责： 1、根据公司的升学服务流程，为学生制定留学方案，并负责全程规划申请； 2、与学生...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"53709b9553f5838bc6ad1568e455f51a\",\"im...</td>\n",
       "      <td>9813897</td>\n",
       "      <td>D轮</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>深圳今日头条科技有限公司</td>\n",
       "      <td>https://www.liepin.com/company/9813897/</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>[linux, mysql, redis, java, php, golang]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>申先生</td>\n",
       "      <td>软件工程师</td>\n",
       "      <td>53709b9553f5838bc6ad1568e455f51a</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>dbdc6f45427246d47083376beee36d15</td>\n",
       "      <td>5f8f986aea60860b75384fab08u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>团队介绍： 国际化电商是以国际化短视频产品为载体的内容电商业务，致力于成为用户发现并获取优价...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"85d96157b5c2f7e877f292afcce1a326\",\"im...</td>\n",
       "      <td>12758947</td>\n",
       "      <td>天使轮</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>深圳市热刻影视广告有限公司</td>\n",
       "      <td>https://www.liepin.com/company/12758947/</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>黄先生</td>\n",
       "      <td>人事行政专员</td>\n",
       "      <td>85d96157b5c2f7e877f292afcce1a326</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>de3e0f8da0ebf908238c2a85c3e62424</td>\n",
       "      <td>5f8f986bdfb13a7dee342f2108u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>职责描述： 1喜欢游戏，喜欢聊天唠嗑，互动能力强 2引导下载、账号交易、虚拟充值等游戏带货，...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>%7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...</td>\n",
       "      <td>{\"imId\":\"3021fd11d274365a1806a0cca0964ad2\",\"im...</td>\n",
       "      <td>13526525</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td></td>\n",
       "      <td>利豪(深圳)贸易有限公司</td>\n",
       "      <td>https://www.liepin.com/company/13526525/</td>\n",
       "      <td>批发/零售</td>\n",
       "      <td>[抖音主播, 教育主播, 数码主播]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=19&amp;...</td>\n",
       "      <td>吴先生</td>\n",
       "      <td>总经理</td>\n",
       "      <td>3021fd11d274365a1806a0cca0964ad2</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>383c8ff219bbc6a3bd0e5cc6eccc5364</td>\n",
       "      <td>6468813140a327112a30bf3e06u.png</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>1.有3C数码带货经验，会起号，会自然流，可以手播脸播，会憋单自己可以写话术脚本。 2.擅长...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22jobKind%22%3A%222%22%2C%22sfrom%22%3A%22...</td>\n",
       "      <td>{\"jobKind\":\"2\",\"userId\":\"8421f31315305acf52a0e...</td>\n",
       "      <td>13247583</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>佛山雨声艺术培训有限公司</td>\n",
       "      <td>https://www.liepin.com/company/13247583/</td>\n",
       "      <td>培训服务</td>\n",
       "      <td>[新媒体运营]</td>\n",
       "      <td>...</td>\n",
       "      <td>d_sfrom=search_prime&amp;d_ckId=null&amp;d_curPage=20&amp;...</td>\n",
       "      <td>张女士</td>\n",
       "      <td>教务管理</td>\n",
       "      <td>b519265c857233b9c68319df6cd38213</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>8421f31315305acf52a0e4216d974e3a</td>\n",
       "      <td>5f8f98648dbe6273dcf8515508u.jpg</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>1:抖音操盘运营 2:抖音剪辑 3:直播\\n职位透镜\\n您与该职位的匹配度： 登录查看\\n登...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>801 rows × 33 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             dataInfo  \\\n",
       "0   %7B%22jobKind%22%3A%222%22%2C%22ckId%22%3A%22y...   \n",
       "1   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "2   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "3   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "4   %7B%22jobKind%22%3A%226%22%2C%22ckId%22%3A%22y...   \n",
       "..                                                ...   \n",
       "36  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "37  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "38  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "39  %7B%22ckId%22%3A%22y2jy1uvl2gar8xipyemg77rijoo...   \n",
       "0   %7B%22jobKind%22%3A%222%22%2C%22sfrom%22%3A%22...   \n",
       "\n",
       "                                           dataParams  comp.compId  \\\n",
       "0   {\"userId\":\"deb609da5f83fec8a1e79f0c0b23f46a\",\"...     12457241   \n",
       "1   {\"userId\":\"ee0a6f0531b74a507d77f77e4b52ce46\",\"...      9547284   \n",
       "2   {\"userId\":\"331bd86e2452b34aa6c2d1d19db4aa85\",\"...     13765355   \n",
       "3   {\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...     13133475   \n",
       "4   {\"userId\":\"494e1776274e3ba3cd4ccb5fcdc273cd\",\"...     13133475   \n",
       "..                                                ...          ...   \n",
       "36  {\"imId\":\"f920bfa6b7342844cffcf433b32a4d3a\",\"im...     12782305   \n",
       "37  {\"imId\":\"53709b9553f5838bc6ad1568e455f51a\",\"im...      9813897   \n",
       "38  {\"imId\":\"85d96157b5c2f7e877f292afcce1a326\",\"im...     12758947   \n",
       "39  {\"imId\":\"3021fd11d274365a1806a0cca0964ad2\",\"im...     13526525   \n",
       "0   {\"jobKind\":\"2\",\"userId\":\"8421f31315305acf52a0e...     13247583   \n",
       "\n",
       "   comp.compStage                    comp.compLogo comp.compScale  \\\n",
       "0           融资未公开  5fa50dff1fcf636a62f8342702u.jpg       10000人以上   \n",
       "1           融资未公开  5f8f9abfdfb13a7dee3432ee08u.jpg       500-999人   \n",
       "2             NaN  61b07937d0458d53c627567e02u.jpg                  \n",
       "3             NaN  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "4             NaN  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "..            ...                              ...            ...   \n",
       "36            NaN  61b07937d0458d53c627567e02u.jpg         50-99人   \n",
       "37             D轮  61b07937d0458d53c627567e02u.jpg       10000人以上   \n",
       "38            天使轮  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "39            NaN  61b07937d0458d53c627567e02u.jpg                  \n",
       "0             NaN  61b07937d0458d53c627567e02u.jpg          1-49人   \n",
       "\n",
       "       comp.compName                                 comp.link  \\\n",
       "0       国信证券深圳互联网分公司  https://www.liepin.com/company/12457241/   \n",
       "1     零一裂变(深圳)科技有限公司   https://www.liepin.com/company/9547284/   \n",
       "2      深圳市逸龙云上科技有限公司  https://www.liepin.com/company/13765355/   \n",
       "3               迪晨文化  https://www.liepin.com/company/13133475/   \n",
       "4               迪晨文化  https://www.liepin.com/company/13133475/   \n",
       "..               ...                                       ...   \n",
       "36  狮鹫国际教育科技(深圳)有限公司  https://www.liepin.com/company/12782305/   \n",
       "37      深圳今日头条科技有限公司   https://www.liepin.com/company/9813897/   \n",
       "38     深圳市热刻影视广告有限公司  https://www.liepin.com/company/12758947/   \n",
       "39      利豪(深圳)贸易有限公司  https://www.liepin.com/company/13526525/   \n",
       "0       佛山雨声艺术培训有限公司  https://www.liepin.com/company/13247583/   \n",
       "\n",
       "   comp.compIndustry                                job.labels  ...  \\\n",
       "0           基金/证券/期货                             [自媒体运营, 内容运营]  ...   \n",
       "1                互联网                                [大专, 直播运营]  ...   \n",
       "2               IT服务                    [大专, 直播运营, 直播平台, 抖音平台]  ...   \n",
       "3             专业技术服务        [大专, 电商直播, 线下运营, 线上运营, 抖音平台, 直播运营]  ...   \n",
       "4             专业技术服务            [大专, 直播电商, 电商平台, 直播带货经验, 抖音平台]  ...   \n",
       "..               ...                                       ...  ...   \n",
       "36            专业技术服务                                      [英语]  ...   \n",
       "37           在线社交/媒体  [linux, mysql, redis, java, php, golang]  ...   \n",
       "38           在线社交/媒体                                        []  ...   \n",
       "39             批发/零售                        [抖音主播, 教育主播, 数码主播]  ...   \n",
       "0               培训服务                                   [新媒体运营]  ...   \n",
       "\n",
       "                                       job.dataPromId recruiter.recruiterName  \\\n",
       "0   d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...                     邓女士   \n",
       "1   d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...                     李女士   \n",
       "2   d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...                     赵先生   \n",
       "3   d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...                     许女士   \n",
       "4   d_sfrom=search_prime&d_ckId=null&d_curPage=0&d...                     许女士   \n",
       "..                                                ...                     ...   \n",
       "36  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...                     朱女士   \n",
       "37  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...                     申先生   \n",
       "38  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...                     黄先生   \n",
       "39  d_sfrom=search_prime&d_ckId=null&d_curPage=19&...                     吴先生   \n",
       "0   d_sfrom=search_prime&d_ckId=null&d_curPage=20&...                     张女士   \n",
       "\n",
       "   recruiter.recruiterTitle                    recruiter.imId  \\\n",
       "0                    人力资源主管  0724b479f957b36106342a441a87c2a0   \n",
       "1                        HR  1f26af67e8c76bb552e4223b98a2bc60   \n",
       "2                      人事经理  464db0c221563abe058646d4efc6c3fb   \n",
       "3                      运营总监  a07dd74399e29e669552a237d109804b   \n",
       "4                      运营总监  a07dd74399e29e669552a237d109804b   \n",
       "..                      ...                               ...   \n",
       "36                     市场经理  f920bfa6b7342844cffcf433b32a4d3a   \n",
       "37                    软件工程师  53709b9553f5838bc6ad1568e455f51a   \n",
       "38                   人事行政专员  85d96157b5c2f7e877f292afcce1a326   \n",
       "39                      总经理  3021fd11d274365a1806a0cca0964ad2   \n",
       "0                      教务管理  b519265c857233b9c68319df6cd38213   \n",
       "\n",
       "   recruiter.imUserType recruiter.chatted             recruiter.recruiterId  \\\n",
       "0                     2             False  deb609da5f83fec8a1e79f0c0b23f46a   \n",
       "1                     2             False  ee0a6f0531b74a507d77f77e4b52ce46   \n",
       "2                     2             False  331bd86e2452b34aa6c2d1d19db4aa85   \n",
       "3                     2             False  494e1776274e3ba3cd4ccb5fcdc273cd   \n",
       "4                     2             False  494e1776274e3ba3cd4ccb5fcdc273cd   \n",
       "..                  ...               ...                               ...   \n",
       "36                    2             False  55314411e980c8125bccecbcbcec23ec   \n",
       "37                    2             False  dbdc6f45427246d47083376beee36d15   \n",
       "38                    2             False  de3e0f8da0ebf908238c2a85c3e62424   \n",
       "39                    2             False  383c8ff219bbc6a3bd0e5cc6eccc5364   \n",
       "0                     2             False  8421f31315305acf52a0e4216d974e3a   \n",
       "\n",
       "           recruiter.recruiterPhoto job.requireWorkYears  \\\n",
       "0   5f8f986779c7cc70efbf36c008u.jpg                  NaN   \n",
       "1   5f8f9863f6d1ab58476f246a08u.jpg                  NaN   \n",
       "2   5f8f986bdfb13a7dee342f2108u.jpg                  NaN   \n",
       "3   5f8f9863f6d1ab58476f246a08u.jpg                  NaN   \n",
       "4   5f8f9863f6d1ab58476f246a08u.jpg                  NaN   \n",
       "..                              ...                  ...   \n",
       "36  5f8f98648dbe6273dcf8515508u.jpg                 经验不限   \n",
       "37  5f8f986aea60860b75384fab08u.jpg                 经验不限   \n",
       "38  5f8f986bdfb13a7dee342f2108u.jpg                 经验不限   \n",
       "39  6468813140a327112a30bf3e06u.png                 经验不限   \n",
       "0   5f8f98648dbe6273dcf8515508u.jpg                 经验不限   \n",
       "\n",
       "                                                 技能要求  \n",
       "0   工作职责： 1、负责投资者教育相关的文案内容撰写及短视频剪辑。 2、负责策划线上直播与投教活...  \n",
       "1   职责描述： 1、协助主播做直播流程规划、品类选款准备，直播间设备仪器、灯光等准备工作。 2、...  \n",
       "2   岗位职责: 1、负责抖音直播平台的整体规划，平台主播进行资源对接; 2、负责抖音直播运营事宜...  \n",
       "3   一、工作内容： 1. 开播前和运营确定直播流程，熟悉产品，有问题及时调整； 2.调节直播间气...  \n",
       "4   一、工作内容： 1、直播带货主播 2、通过抖音直播形式向粉丝介绍产品信息，回答粉丝问题，引导...  \n",
       "..                                                ...  \n",
       "36  岗位职责： 1、根据公司的升学服务流程，为学生制定留学方案，并负责全程规划申请； 2、与学生...  \n",
       "37  团队介绍： 国际化电商是以国际化短视频产品为载体的内容电商业务，致力于成为用户发现并获取优价...  \n",
       "38  职责描述： 1喜欢游戏，喜欢聊天唠嗑，互动能力强 2引导下载、账号交易、虚拟充值等游戏带货，...  \n",
       "39  1.有3C数码带货经验，会起号，会自然流，可以手播脸播，会憋单自己可以写话术脚本。 2.擅长...  \n",
       "0   1:抖音操盘运营 2:抖音剪辑 3:直播\\n职位透镜\\n您与该职位的匹配度： 登录查看\\n登...  \n",
       "\n",
       "[801 rows x 33 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "af['技能要求'] = 技能要求\n",
    "af"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "08272a43",
   "metadata": {},
   "source": [
    "# 输出excel表格"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "84c7dab7",
   "metadata": {},
   "outputs": [],
   "source": [
    "af.to_excel('深圳_直播.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d245ebcb",
   "metadata": {},
   "source": [
    "# 输出可视化图表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8dc40181",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "836b3d3a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86135\\AppData\\Local\\Temp\\ipykernel_18712\\843216831.py:45: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  非薪资面议['平均薪资'] = 平均薪资\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['专业技术服务', '互联网', '批发/零售', '电子商务', '计算机软件']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "    df_PM_gz = af[\n",
    "        ['job.labels', 'job.refreshTime', 'job.title', 'job.salary', 'job.dq', 'job.topJob', 'job.requireWorkYears',\n",
    "         'job.requireEduLevel',  'comp.compName', 'comp.compIndustry', 'comp.compScale']]\n",
    "    df_PM_gz\n",
    "    df_PM_gz['job.dq'].value_counts()\n",
    "    [i for i in df_PM_gz['job.dq'].value_counts().index.tolist() if '-' in i]\n",
    "    地区 = [df_PM_gz['job.dq'].value_counts().index.tolist()[i].split('-')[1] \\\n",
    "            for i, v in enumerate(df_PM_gz['job.dq'].value_counts().index.tolist()) if '-' in v]\n",
    "    地区\n",
    "    岗位个数 = [df_PM_gz['job.dq'].value_counts().values.tolist()[i] for i, v in\n",
    "                enumerate(df_PM_gz['job.dq'].value_counts().index.tolist()) if '-' in v]\n",
    "    岗位个数\n",
    "    we = df_PM_gz\n",
    "    we\n",
    "    非薪资面议 = we[~we['job.salary'].str.contains(\"薪资面议\" and \"面议|元/天\")]\n",
    "\n",
    "    非薪资面议\n",
    "    非薪资面议_detail = 非薪资面议['job.salary'].apply(lambda x: x.split('薪')[0].split('·')).tolist()\n",
    "    非薪资面议_detail\n",
    "    平均薪资 = []\n",
    "\n",
    "    平均薪资 = []\n",
    "\n",
    "    for i in 非薪资面议_detail:\n",
    "        if '面议' in i[0]:\n",
    "            平均薪资.append(None)  # 面议情况下可以选择添加其他默认值或者 None\n",
    "        else:\n",
    "            salary_numbers = re.findall(r'\\d+', i[0])\n",
    "\n",
    "            # 添加对索引的检查\n",
    "            if len(salary_numbers) >= 2:\n",
    "                lower_salary = int(salary_numbers[0])\n",
    "                upper_salary = int(salary_numbers[1])\n",
    "                average_salary = (lower_salary + upper_salary) / 2\n",
    "\n",
    "                if len(i) == 1:\n",
    "                    平均薪资.append(average_salary)\n",
    "                elif len(i) >= 2:  # 添加对 i[1] 的检查\n",
    "                    平均薪资.append(round(average_salary * int(i[1]) / 12, 1))\n",
    "                else:\n",
    "                    平均薪资.append(None)  # 添加默认值或者 None\n",
    "            else:\n",
    "                平均薪资.append(None)  # 添加默认值或者 None\n",
    "\n",
    "    非薪资面议['平均薪资'] = 平均薪资\n",
    "    非薪资面议\n",
    "    # 分工作年限平均薪资\n",
    "    分年限_平均薪资 = 非薪资面议.groupby('job.requireWorkYears').agg({'平均薪资': 'median'})\n",
    "    分年限_平均薪资\n",
    "    年限 = 分年限_平均薪资.reindex(index=[\"经验不限\",\"一年以下\", \"1-3年\", \"3-5年\", \"5-10年\", \"10年以上\"])\n",
    "    年限\n",
    "    分年限_平均薪资_values = [round(i[0], 1) for i in 年限.values.tolist()]\n",
    "    分年限_平均薪资_values\n",
    "    分年限_平均薪资_index = 年限.index.tolist()\n",
    "    分年限_平均薪资_index\n",
    "    df_PM_gz['job.title']\n",
    "    # 还要合并回去原来的行\n",
    "    df_PM_gz['job.title'][df_PM_gz['job.title'].str.contains('（')].str.split('（').apply(lambda x: x[0])\n",
    "    # 处理过一些，清洗后的数据\n",
    "    df_job_title = df_PM_gz['job.title'].apply(lambda x: x.split('（')[0].split('/')[0].split('(')[0]).value_counts()\n",
    "    df_job_title\n",
    "    df_job_title.index.tolist()\n",
    "    # 列表推导式\n",
    "    PM_title_words = [(df_job_title.index.tolist()[i], df_job_title.values.tolist()[i]) for i in\n",
    "                      range(1, len(df_job_title.index.tolist()))]\n",
    "    PM_title_words\n",
    "    # 先统计每个job.title的数量\n",
    "    job_title_counts = 非薪资面议['job.title'].value_counts()\n",
    "\n",
    "    # 获取数量前五的job.title\n",
    "    top_5_job_titles = job_title_counts.head(5).index.fillna('')\n",
    "\n",
    "    # 根据数量前五的job.title筛选数据\n",
    "    分岗位_平均薪资 = 非薪资面议[非薪资面议['job.title'].isin(top_5_job_titles)].groupby('job.title').agg(\n",
    "        {'平均薪资': 'median'})\n",
    "\n",
    "    分岗位_平均薪资\n",
    "    分岗位_平均薪资_values = [round(i[0], 1) for i in 分岗位_平均薪资.values.tolist()]\n",
    "    分岗位_平均薪资_values\n",
    "    分岗位_平均薪资_index = 分岗位_平均薪资.index.tolist()\n",
    "    分岗位_平均薪资_index\n",
    "    # 首先去除'comp.compScale'中没有文字的行\n",
    "    非薪资面议 = 非薪资面议[非薪资面议['comp.compScale'].str.strip() != '']\n",
    "\n",
    "    # 先统计每个job.title的数量\n",
    "    job_title_counts = 非薪资面议['comp.compScale'].value_counts()\n",
    "\n",
    "    # 获取数量前五的job.title\n",
    "    top_5_job_titles = job_title_counts.head(9).index\n",
    "\n",
    "    # 根据数量前五的job.title筛选数据\n",
    "    公司规模 = 非薪资面议[非薪资面议['comp.compScale'].isin(top_5_job_titles)].groupby('comp.compScale').agg(\n",
    "        {'平均薪资': 'median'})\n",
    "    公司规模\n",
    "    公司规模_薪资 = [round(i[0], 1) for i in 公司规模.values.tolist()]\n",
    "    公司规模_薪资\n",
    "    公司规模_人数 = 公司规模.index.tolist()\n",
    "    公司规模_人数\n",
    "    # 行业的平均薪资\n",
    "    hire = 非薪资面议['comp.compIndustry'].value_counts()\n",
    "\n",
    "    top_5_hire = hire.head(5).index.fillna('')\n",
    "\n",
    "    分行业_平均薪资 = 非薪资面议[非薪资面议['comp.compIndustry'].isin(top_5_hire)].groupby('comp.compIndustry').agg(\n",
    "        {'平均薪资': 'median'})\n",
    "\n",
    "    分行业_平均薪资\n",
    "    分行业_平均薪资_values = [round(i[0], 1) for i in 分行业_平均薪资.values.tolist()]\n",
    "    分行业_平均薪资_values\n",
    "    分行业_平均薪资_index = 分行业_平均薪资.index.tolist()\n",
    "    分行业_平均薪资_index\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "227bf299",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts.commons.utils import JsCode\n",
    "from pyecharts.faker import Faker\n",
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Bar, Grid, Line,Map,Page,WordCloud,Pie,Tab\n",
    "from pyecharts.globals import SymbolType\n",
    "\n",
    "background_color_js = (\n",
    "    \"new echarts.graphic.LinearGradient(0, 0, 0, 1, \"\n",
    "    \"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)\"\n",
    ")\n",
    "area_color_js = (\n",
    "    \"new echarts.graphic.LinearGradient(0, 0, 0, 1, \"\n",
    "    \"[{offset: 0, color: '#eb64fb'}, {offset: 1, color: '#3fbbff0d'}], false)\"\n",
    ")\n",
    "\n",
    "line = (\n",
    "    Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))\n",
    "    .add_xaxis(xaxis_data=分年限_平均薪资_index)  # 设置x轴数据为分年限_平均薪资_index\n",
    "    .add_yaxis(\n",
    "        series_name=\"薪资\",\n",
    "        y_axis=分年限_平均薪资_values,  # 设置y轴数据为分年限_平均薪资_values\n",
    "        is_smooth=True,\n",
    "        is_symbol_show=True,\n",
    "        symbol=\"circle\",\n",
    "        symbol_size=6,\n",
    "        linestyle_opts=opts.LineStyleOpts(color=\"#fff\"),\n",
    "        label_opts=opts.LabelOpts(is_show=True, position=\"top\", color=\"white\"),\n",
    "        itemstyle_opts=opts.ItemStyleOpts(\n",
    "            color=\"red\", border_color=\"#fff\", border_width=3\n",
    "        ),\n",
    "        tooltip_opts=opts.TooltipOpts(is_show=False),\n",
    "        areastyle_opts=opts.AreaStyleOpts(color=JsCode(area_color_js), opacity=1),\n",
    "    )\n",
    "    .set_global_opts(\n",
    "        title_opts=opts.TitleOpts(\n",
    "            title=\"直播的从业年限平均薪资\",\n",
    "            pos_left=\"5%\",  # 将标题移到左上角\n",
    "            pos_top=\"5%\",  # 将标题移到左上角\n",
    "            title_textstyle_opts=opts.TextStyleOpts(color=\"#fff\", font_size=16),\n",
    "        ),\n",
    "        xaxis_opts=opts.AxisOpts(\n",
    "            type_=\"category\",\n",
    "            boundary_gap=False,\n",
    "            axislabel_opts=opts.LabelOpts(margin=30, color=\"#ffffff63\"),\n",
    "            axisline_opts=opts.AxisLineOpts(is_show=False),\n",
    "            axistick_opts=opts.AxisTickOpts(\n",
    "                is_show=True,\n",
    "                length=25,\n",
    "                linestyle_opts=opts.LineStyleOpts(color=\"#ffffff1f\"),\n",
    "            ),\n",
    "            splitline_opts=opts.SplitLineOpts(\n",
    "                is_show=True, linestyle_opts=opts.LineStyleOpts(color=\"#ffffff1f\")\n",
    "            ),\n",
    "        ),\n",
    "        yaxis_opts=opts.AxisOpts(\n",
    "            type_=\"value\",\n",
    "            position=\"right\",\n",
    "            axislabel_opts=opts.LabelOpts(margin=20, color=\"#ffffff63\"),\n",
    "            axisline_opts=opts.AxisLineOpts(\n",
    "                linestyle_opts=opts.LineStyleOpts(width=2, color=\"#fff\")\n",
    "            ),\n",
    "            axistick_opts=opts.AxisTickOpts(\n",
    "                is_show=True,\n",
    "                length=15,\n",
    "                linestyle_opts=opts.LineStyleOpts(color=\"#ffffff1f\"),\n",
    "            ),\n",
    "            splitline_opts=opts.SplitLineOpts(\n",
    "                is_show=True, linestyle_opts=opts.LineStyleOpts(color=\"#ffffff1f\")\n",
    "            ),\n",
    "        ),\n",
    "        legend_opts=opts.LegendOpts(is_show=False),\n",
    "    )\n",
    "     .render(\"深圳_直播平均薪资折线图.html\")\n",
    "\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3de185f8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
